clear
set more off

do preliminaries_2024

keep if pubscore_normal~=.
keep if teamsize<3
gen missing_dates = 0
replace missing_dates = 1 if (requestdate1==. | requestdate2==.)
replace missing_dates = 0 if teamsize~=2
drop if missing_dates==1
drop if rewardquantity<5000

*identifying treated teams
gen treated = 0
replace treated = 1 if teamsize==2
gen treated_post = 0
replace treated_post = 1 if treated==1 & submissiondate>=merger_time

*max score in competition
bys competitionid (id): gen maxscore = -1000 if _n==1
bys competitionid (id): replace maxscore = max(maxscore[_n-1], pubscore_normal[_n-1]) if _n>1
bys competitionid (id): replace maxscore = . if _n==1
*score change indicator
gen maxscorechange = pubscore_normal>maxscore

*team rankings/sample selection (keep top 40, taking into account that some users don't appear making submissions)
bys teamid: egen maxscore_team=max(pubscore_normal)
bys teamid submitteduserid (id): replace maxscore_team=. if _n>1
replace maxscore_team = -maxscore_team
bys competitionid (maxscore_team): gen rank = _n
gen rank40 = rank<=40

*drop competitions without treated teams
bys competitionid: egen tteam = max(treated)
drop if tteam==0

xi: logit maxscorechange maxscore treated_post rank40 i.competitionid , r
estimates store p
esttab p , b(%8.3f) se(%8.3f) r2(%8.3f) tex label keep(maxscore treated_post )  title("$q$-function estimates: Logit regression") order(  ) star(* 0.1 ** 0.05 *** 0.01) 
gen beta0 = _b[_cons]+_b[rank40]
gen beta1 = _b[maxscore]
gen beta1tr = _b[treated_post]

gen se1 = _se[maxscore]
gen se1tr = _se[treated_post]
gen se0= _se[_cons]
levelsof competitionid if competitionid>2500, local(levels)
foreach l of local levels {
replace beta0 = _b[_Icompetiti_`l'] + _b[_cons]+_b[rank40]  if competitionid==`l'
quietly: lincom _cons + _b[_Icompetiti_`l']+_b[rank40]
capture: replace se0 = r(se) if competitionid==`l'
}

keep if rank<=40

*adjusting for failed teams
preserve
collapse (max) maxscore_team rank (last)  treated teamid , by(competitionid submitteduserid)
sort competitionid rank
keep submitteduserid
save estimationsample_userids, replace
restore
joinby submitteduserid using estimationsample_userids, unmatched(both)
keep if _merge==3
drop _merge

*sample selection: top 40, competitions with mergers
bys competitionid: egen tottreated = sum(treated)
drop if tottreated==0
bys competitionid: egen totchanges = sum(maxscorechange)
drop if totchanges==0

*treated/failed teams
bys teamid: gen auxtreated = treated if _n==1
bys competitionid: egen totmergers = total(auxtreated)

*save q-estimates
preserve
keep competitionid beta0 beta1 beta1tr
collapse (last) beta0 beta1 beta1tr, by(competitionid)
rename competitionid cid
outsheet using q_estimates.csv, replace
restore
preserve
keep competitionid se0 se1 se1tr
collapse (last) se0 se1 se1tr, by(competitionid)
rename competitionid cid
outsheet using q_inference.csv, replace
restore

tostring competitionid, replace
joinby competitionid using competitions, unmatched(master)
destring competitionid, replace
gen  deadline_dayaux = date(substr(deadlinedate,1,10), "MDY")
gen  dateenabled_dayaux = date(substr(enableddate,1,10), "MDY")
gen length_days = deadline_dayaux-dateenabled_dayaux

*restricting variables
keep competitionid teamid id maxscore pubscore_normal  totmergers length_days rewardquantity 
bys competitionid: egen minmaxscore = min(maxscore)
replace maxscore = minmaxscore - 0.1 if maxscore==.
drop minmaxscore
sort competitionid id
*outsheet using estimation_sample.csv, replace
outsheet using estimation_sample40.csv, replace

